suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(hablar))
suppressPackageStartupMessages(library(tsibble))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(DT))
suppressPackageStartupMessages(library(ggplot2))
moduletwo <- read_excel( "~/Desktop/thesis_surveydata/political_factors_complete.xlsx")
save(moduletwo, file = "moduletwo.rda")
module2 <- data.frame(moduletwo)
module2 %>%
  filter(!row_number() %in% c(1,2))
# loaded data set for module 2 eco -pol variables and removes the rows with question number and question text
#This chunk contains the coding schemes of various scales used in survey one: eco-political factors, kahan scale and acceptance scale
codedmodule2 <- module2 %>%
  
#remove row 1
  filter(!row_number() %in% c(1,2)) %>% 
  
# replace risky likert scale with numbers
  mutate_at(vars(starts_with("Risky")), funs(case_when(. =="Not at all risky" ~ 1, 
                                                       . =="Slightly risky" ~ 2, 
                                                       . =="Moderately risky" ~ 3, 
                                                       . =="Very risky" ~ 4, 
                                                       . =="Extremely risky" ~ 5))) %>%

# replace beneficial likert scale with numbers  
  mutate_at(vars(starts_with("Ben")), funs(case_when(. =="Not at all beneficial" ~ 1,
                                                     . =="Slightly beneficial" ~ 2,
                                                     . =="Moderately beneficial" ~ 3,
                                                     . =="Very beneficial" ~ 4,
                                                     . =="Extremely beneficial" ~ 5 ))) %>%

# replace nuclear acceptance likert scale with numbers
  mutate_at(vars(N_accept,N_reluctantlyaccept,N_reject), funs(case_when(. == "Strongly disagree" ~ 1, 
                                                                        . == "Somewhat disagree" ~ 2,
                                                                        . == "Neither agree nor disagree" ~3,
                                                                        . == "Somewhat agree" ~ 4,
                                                                        . == "Strongly agree" ~ 5))) %>%
  
# code likert scale for variables for Kahan scale into numbers
  mutate_at(vars(starts_with (c("K_I","K_H","DISPLACE", "POLLUTE", "HEALTH", "JOBS", "BEAUTY", "PRIDE", "NPRIDE", "DEV", "PROSPER", "RELY"))), funs(case_when(. == "Strongly disagree" ~ 1, 
               . == "Somewhat disagree" ~ 2,
               . == "Neither agree nor disagree" ~3,
               . == "Somewhat agree" ~ 4,
               . == "Strongly agree" ~ 5))) %>%
  
# reverse code for likert scale for variables for Kahan scale into numbers
  mutate_at(vars(starts_with (c("K_S","K_E"))), funs(case_when(. == "Strongly disagree" ~ 5, 
                                                               . == "Somewhat disagree" ~ 4,
                                                               . == "Neither agree nor disagree" ~3,
                                                               . == "Somewhat agree" ~ 2,
                                                               . == "Strongly agree" ~ 1))) %>%

# code eco-pol scale variables into numbers
  mutate_at(vars(SYSTEMDEMO,SYSTEMRELIGION,SYSTEMTECHNO,SYSTEMTOTAL,WEALTHLIM,MECHANISATION,DECISIONDECEN,INDUSTRYLARGE,ECONOMYGLOBAL,OWNERPVT,OWNERNOREG), funs(case_when(. == "Strongly disagree" ~ 1, 
                        . == "Somewhat disagree" ~ 2,
                        . == "Neither agree nor disagree" ~3,
                        . == "Somewhat agree" ~ 4,
                        . == "Strongly agree" ~ 5))) %>%

# reverse code eco-pol scale variables into numbers 
 mutate_at(vars(DECISIONCEN, INDUSTRYSMALL, ECONOMYLOCAL, ENVOVERDEV,OWNERPUB, OWNERREG), funs(case_when(. == "Strongly disagree" ~ 5, 
                                                                                                         . == "Somewhat disagree" ~ 4,
                                                                                                         . == "Neither agree nor disagree" ~3,
                                                                                                         . == "Somewhat agree" ~ 2,
                                                                                                         . == "Strongly agree" ~ 1)))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
codedmodule2
# summary statistics for risks from different energy sources

risksummary <- codedmodule2 %>%
  summarize_at(vars(starts_with(c("RISKY"))), list(~mean(., na.rm = TRUE), ~median(., na.rm = TRUE), ~sd(.,na.rm = TRUE),~var(.,na.rm = TRUE), ~n())) %>%
  #add row index so later spreading indexed correctly
  add_rownames()%>%
  #melt to long format
  gather(technology, value, -rowname) %>%
  # separate risky from variable suffix
  separate(technology, c("Risky", "var"), extra = "merge", fill = "left") %>%
  #separate mean from variable prefix
  separate(var, c("technology", "summary")) %>%
  # spread summary values back to wide form
  spread(summary,value) %>%
  #clean up
  select(-rowname, -Risky) %>%
  arrange(mean)%>%
  setNames(paste0('perceivedrisk.', names(.)))%>%
  rename(technology = perceivedrisk.technology)
## Warning: `add_rownames()` was deprecated in dplyr 1.0.0.
## Please use `tibble::rownames_to_column()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
risksummary
#plotting perceived risk from different technologies

module2%>%
  filter(!row_number() %in% c(1,2)) %>% 
  select(starts_with("RISKY"))%>%
  gather(Technology, Perceived_Risk, Risky_Hydro:Risky_INDLPG, factor_key = TRUE)%>%
  separate(Technology, c("Risky", "Technology"), extra = "merge", fill = "left")%>%
  select(-Risky) %>%
  group_by(Technology, Perceived_Risk) %>%
  summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
colorsequence<- c("#8C4646","#D9695F","#F2A679","#F2D091","#5D8C7B","#808080")
df_barplot<-module2%>%
  filter(!row_number() %in% c(1,2)) %>% 
  select(starts_with("RISKY"))%>%
  gather(Technology, Perceived_Risk, Risky_Hydro:Risky_INDLPG, factor_key = TRUE)%>%
  separate(Technology, c("Risky", "Technology"), extra = "merge", fill = "left")%>%
  select(-Risky) %>% 
  group_by(Technology, Perceived_Risk) %>%
  summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
df_barplot %>% ggplot(aes(fill= fct_rev(factor(Perceived_Risk, levels=c("Extremely risky", "Very risky","Moderately risky","Slightly risky","Not at all risky","Rather not say/Don't know"))), y=n , x=Technology)) + 
  geom_bar(position="fill",stat="identity")+
  scale_fill_manual("legend", values = c("Extremely risky" = "#8C4646", "Very risky" = "#D9695F", "Moderately risky" = "#F2A679", "Slightly risky" = "#F2D091","Not at all risky" = "#5D8C7B", "Rather not say/Don't know" = "#808080"))+
  coord_flip()+
  theme_classic()

# fix ugly visual. Rearrange from extremely risky to not at all risky. Red to green, rather not say/Don't know should be grey. Also facet wrap by INDividually owned and centrally owned technology.
# summary statistics for benefits from different energy sources

benefitsummary <- codedmodule2 %>%
  summarize_at(vars(starts_with(c("Ben"))), list(~mean(., na.rm = TRUE), ~median(., na.rm = TRUE), ~sd(.,na.rm = TRUE),~var(.,na.rm = TRUE), ~n())) %>%
  #add row index so later spreading indexed correctly
  add_rownames()%>%
  #melt to long format
  gather(technology, value, -rowname) %>%
  # separate Ben from variable suffix
  separate(technology, c("Ben", "var"), extra = "merge", fill = "left") %>%
  #separate mean from variable prefix
  separate(var, c("technology", "summary")) %>%
  # spread summary values back to wide form
  spread(summary,value) %>%
  #clean up
  select(-rowname, -Ben) %>%
  arrange(mean)%>%
  setNames(paste0('perceivedbenefit.', names(.))) %>%
  rename(technology = perceivedbenefit.technology)
benefitsummary
#round off to two decimal places
colorsequence<- c("#8C4646","#D9695F","#F2A679","#F2D091","#5D8C7B","#808080")
df_barplotben <- module2%>%
  filter(!row_number() %in% c(1,2)) %>% 
  select(starts_with("Ben"))%>%
  gather(Technology, Perceived_Benefit, Ben_Hydro:Ben_INDLPG, factor_key = TRUE)%>%
  separate(Technology, c("Ben", "Technology"), extra = "merge", fill = "left")%>%
  select(-Ben) %>% 
  group_by(Technology, Perceived_Benefit) %>%
  summarise(n=n())
## `summarise()` has grouped output by 'Technology'. You can override using the
## `.groups` argument.
df_barplotben %>% ggplot(aes(fill= factor(Perceived_Benefit, levels=c("Extremely beneficial", "Very beneficial","Moderately beneficial","Slightly beneficial","Not at all beneficial","Rather not say/Don't know")), y=n , x=Technology)) + 
  geom_bar(position="fill",stat="identity")+
  scale_fill_manual("legend", values = c("Extremely beneficial" = "#5D8C7B", "Very beneficial" = "#F2D091", "Moderately beneficial" = "#F2A679", "Slightly beneficial" = "#D9695F","Not at all beneficial" = "#8C4646", "Rather not say/Don't know" = "#808080"))+
  coord_flip()+
  theme_classic()

# fix ugly visual. Rearrange from extremely beneficial to not at all beneficial. Green to red, rather not say/Don't know should be grey. Also facet wrap by INDividually owned and centrally owned technology. Also arrange descending from most beneficial to least beneficial. 
#summary statistics for nuclear energy accept, reject and reluctantly accept scale
codedmodule2 %>%
  summarize_at(vars(N_accept,N_reject,N_reluctantlyaccept), list(~mean(., na.rm = TRUE),~median(., na.rm = TRUE), ~sd(.,na.rm = TRUE),~var(.,na.rm = TRUE),~n())) %>%
  add_rownames() %>%
  gather(acceptance, value, -rowname) %>%
  separate(acceptance, c("N_accept", "var") ,  extra = "merge", fill = "left") %>%
  separate(var, c("Acceptance", "summary")) %>%
  spread(summary, value) %>%
  select(-rowname, -N_accept)%>%
  arrange(mean)
# chi square test for risk perception from nuclear energy and gender, race 

# first combine gender and caste variables

chitest <- codedmodule2 %>%
  select(Risky_Nuclear,Risky_Hydro, Risky_Hydro, Risky_Solar,Urban_Rural, Gender, Caste) %>%
  na.omit() %>%
  filter(!Caste=="Rather not say/Don't know")%>%
  unite(gendercaste, Gender, Caste, sep = "_", remove = FALSE)

chitest
full_join(risksummary,benefitsummary, by = NULL, copy = FALSE, keep = FALSE) %>%
  select(technology, perceivedrisk.mean, perceivedbenefit.mean) %>%
  mutate(technology = fct_reorder(technology, perceivedrisk.mean, .desc = TRUE))%>%
   gather(key = "level", value = "mean",
       perceivedrisk.mean, perceivedbenefit.mean) %>%
  ggplot(aes(x=technology, y= mean, fill = level, ))+
  geom_bar(stat="identity", position=position_dodge())+
  ylim(0,4)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual("legend", values = c("perceivedbenefit.mean" = "palegreen3", "perceivedrisk.mean" = "firebrick2"))
## Joining, by = "technology"

# arrange descending and facet wrap by IND prefix 
full_join(risksummary,benefitsummary, by = NULL, copy = FALSE, keep = FALSE) %>%
  select(technology, perceivedrisk.mean, perceivedbenefit.mean) %>%
  mutate(technology = fct_reorder(technology, perceivedbenefit.mean, .desc = TRUE))%>%
   gather(key = "level", value = "mean",
       perceivedrisk.mean, perceivedbenefit.mean) %>%
  ggplot(aes(x=technology, y= mean, fill = level, ))+
  geom_bar(stat="identity", position=position_dodge())+
  ylim(0,4)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual("legend", values = c("perceivedbenefit.mean" = "palegreen3", "perceivedrisk.mean" = "firebrick2"))
## Joining, by = "technology"

full_join(risksummary,benefitsummary, by = NULL, copy = FALSE, keep = FALSE) %>%
  select(technology, perceivedrisk.median, perceivedbenefit.median) %>%
  mutate(technology = fct_reorder(technology, perceivedrisk.median, .desc = TRUE))%>%
   gather(key = "level", value = "median",
       perceivedrisk.median, perceivedbenefit.median) %>%
  ggplot(aes(x=technology, y= median, fill = level, ))+
  geom_bar(stat="identity", position=position_dodge())+
  ylim(0,4)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_fill_manual("legend", values = c("perceivedbenefit.median" = "palegreen3", "perceivedrisk.median" = "firebrick2"))
## Joining, by = "technology"

# arrange descending and facet wrap by IND prefix 

  
# arrange descending and facet wrap by IND prefix 
 #median score for Kahan invidualism scale 

codedmodule2 %>%
  select((starts_with(c("K_I", "K_S"))))%>%
  gather(key = "K_I", value = "score")%>%
  na.omit()%>%
  summarise(median = median(c(score)))
# Kahan scale median score for Hierarchy scale
codedmodule2 %>%
  select((starts_with(c("K_H", "K_E"))))%>%
  gather(key = "K_H", value = "score")%>%
  na.omit()%>%
  summarise(median = median(c(score)))
#calculating individualism score for each respondent

Kscalescores <- codedmodule2 %>%
  select(starts_with(c("Risky","K_I","K_S", "K_H", "K_E")))%>%
  rowwise()%>%
  na.omit()%>%
  mutate(Individualism_score = mean(c_across(K_IINTRFER:K_SPROTECT)))%>%
  mutate(Hierarchy_score = mean(c_across(K_HEQUAL:K_ERADEQ2)))%>%
  select(!starts_with(c("K_I","K_S", "K_H", "K_E")))

Kscalescores
#scatter plot of Kahan scale scores around the median scores on Individualism and Hierarchy scales. 

Kscalescores %>%
  ggplot(aes(Individualism_score, Hierarchy_score))+
  geom_point()+
  geom_hline(yintercept=2, colour="black", lwd=1)+
  geom_vline(xintercept=3, colour="black", lwd=1)

# checking distribution of scores on Individualism scale 

Kscalescores %>%
  ggplot(aes(x=Individualism_score, y = ..count..), fill = "lightgray")+
  geom_density()

# checking distribution of scores on Hierarchy scale 
Kscalescores %>%
  ggplot(aes(x=Hierarchy_score, y = ..count..), fill = "lightgray")+
  geom_density()

chisq.test(chitest$Risky_Nuclear, chitest$Urban_Rural)
## 
##  Pearson's Chi-squared test
## 
## data:  chitest$Risky_Nuclear and chitest$Urban_Rural
## X-squared = 16.389, df = 4, p-value = 0.002539